Marcelo Ponce, Amit Sandhel (2020). covid19.analytics: An R Package to Obtain, Analyze and Visualize Data from the Corona Virus Disease Pandemic. URL https://arxiv.org/abs/2009.01091

Analysis of current COVID data and predictions

predictionDays <- 90
country <- "Canada"
population <- 37590000
noCache <- TRUE
limNbCases <- 1500
limNbDeaths <- 2000
limNbDeathsYear <- 150000
## [1] "Data dated from:  2022-02-07"

Looking at if new cases can predict hospitalisation 13 days in future.

# 13 jours Test
casCovidParJour <- confirmes_quebec
casCovidParJour <- casCovidParJour[order(casCovidParJour$Date), ]
casCovidParJour$Date <- as.Date(casCovidParJour$Date)
hospTreizeJours <- hosp_quebec

merg <- inner_join(casCovidParJour, hospTreizeJours, by = c("Date"))
merg <- merg[order(merg$Date), ]
# Remove start of pandemy irregular
merg <- merg[-1:-144, ]

newCases <- head(merg$Nb_Nvx_Cas, -13)
icu <- tail(merg$ACT_Si_RSS99, -13)
hosp <- tail(merg$ACT_Hsi_RSS99, -13)

corrNewCasesIcu <- data.frame(newCases, icu)
corrNewCasesHosp <- data.frame(newCases, hosp)

ratioIcu <- mean(newCases / icu)
ratioHosp <- mean(newCases / hosp)

# Mean ratio new cases / hosp + 13 days
# ratioHosp <- 1.97
# ratioIcu <- 10.6


m.icu <- lm(newCases ~ icu, data = corrNewCasesIcu)
coeffIcu <- m.icu$coefficients[2]
interceptIcu <- m.icu$coefficients[1]

m.hosp <- lm(newCases ~ hosp, data = corrNewCasesHosp)
coeffHosp <- m.hosp$coefficients[2]
interceptHosp <- m.hosp$coefficients[1]

merg$Date <- as.Date(merg$Date) + 13
last13Days <- tail(merg, 13)
last13Days$Pred_ACT_Hsi_RSS99 <- as.integer(interceptHosp + (last13Days$Nb_Nvx_Cas/coeffHosp))
last13Days$Pred_ACT_Si_RSS99 <- as.integer(interceptIcu + (last13Days$Nb_Nvx_Cas/coeffIcu))
last13Days <- last13Days %>% select(Date, Nb_Nvx_Cas, ACT_Hsi_RSS99, ACT_Si_RSS99, Pred_ACT_Hsi_RSS99, Pred_ACT_Si_RSS99)
last13Days$ACT_Hsi_RSS99 <- rev(last13Days$ACT_Hsi_RSS99)
last13Days$ACT_Si_RSS99 <- rev(last13Days$ACT_Si_RSS99)

Test de corrélation entre les nouveaux cas et le nombre de patients en soins intensifs 13 jours plus tard

plot(newCases ~ icu, ylab = "Nouveaux cas", xlab = "Soins Internes 13 jours + tard")

plot(rstudent(m.icu) ~ fitted(m.icu),
  ylab = "Résidus de Student",
  xlab = "Valeurs prédites",
  main = "Homogénéité des variances"
)

qqnorm(rstudent(m.icu),
  ylab = "Quantiles observés",
  xlab = "Quantiles théoriques", main = "Normalité des résidus"
)
qqline(rstudent(m.icu))

summary(m.hosp)
## 
## Call:
## lm(formula = newCases ~ hosp, data = corrNewCasesHosp)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5041.4  -409.5    96.9   336.6  9484.0 
## 
## Coefficients:
##               Estimate Std. Error t value             Pr(>|t|)    
## (Intercept) -392.78182   81.96507  -4.792           0.00000217 ***
## hosp           3.54928    0.09747  36.415 < 0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1418 on 509 degrees of freedom
## Multiple R-squared:  0.7226, Adjusted R-squared:  0.7221 
## F-statistic:  1326 on 1 and 509 DF,  p-value: < 0.00000000000000022
summary(m.icu)
## 
## Call:
## lm(formula = newCases ~ icu, data = corrNewCasesIcu)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3230.1 -1020.9   -29.5   733.7 11941.7 
## 
## Coefficients:
##              Estimate Std. Error t value            Pr(>|t|)    
## (Intercept) -1589.601    159.382  -9.974 <0.0000000000000002 ***
## icu            30.483      1.327  22.964 <0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1887 on 509 degrees of freedom
## Multiple R-squared:  0.5088, Adjusted R-squared:  0.5079 
## F-statistic: 527.3 on 1 and 509 DF,  p-value: < 0.00000000000000022
cor.test(corrNewCasesIcu$newCases, corrNewCasesIcu$icu)
## 
##  Pearson's product-moment correlation
## 
## data:  corrNewCasesIcu$newCases and corrNewCasesIcu$icu
## t = 22.964, df = 509, p-value < 0.00000000000000022
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.6679218 0.7534554
## sample estimates:
##       cor 
## 0.7133348

Test de corrélation entre les nouveaux cas et le nombre de patients hospitalisés 13 jours plus tard

cor.test(newCases, hosp)
## 
##  Pearson's product-moment correlation
## 
## data:  newCases and hosp
## t = 36.415, df = 509, p-value < 0.00000000000000022
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.8240963 0.8724794
## sample estimates:
##       cor 
## 0.8500716
plot(hosp_quebec$ACT_Si_RSS99, ylim = c(0, max(hosp_quebec$ACT_Si_RSS99)), xlim = c(0, nrow(hosp_quebec)), ylab = "Hospitalisations Soins Intensifs", xlab = "Jours", main = "Quebec: Actuel - Soins Intensifs")

previsions(hosp_quebec$Date, hosp_quebec$ACT_Si_RSS99, "Québec - Prévision Soins Intensifs ", "Date", "Patients", 90)

plot(hosp_quebec$ACT_Hsi_RSS99, ylim = c(0, max(hosp_quebec$ACT_Hsi_RSS99)), xlim = c(0, nrow(hosp_quebec)), ylab = "Hospitalisations", xlab = "Jours", main = "Quebec: Actuel - Hospitalisations")

previsions(hosp_quebec$Date, hosp_quebec$ACT_Hsi_RSS99, "Québec - Prévision Hospitalisations ", "Date", "Patients", 90)

plot(confirmes_quebec$Nb_Nvx_Cas, ylim = c(0, max(confirmes_quebec$Nb_Nvx_Cas)), xlim = c(0, nrow(confirmes_quebec)), ylab = "Cas", xlab = "Jours", main = "Quebec : Actuel - Cas")

previsions(confirmes_quebec$Date, confirmes_quebec$Nb_Nvx_Cas, "Québec - Prévision - Cas ", "Date", "Cas", 90)

plot(deces_quebec$total_jour, ylim = c(0, max(deces_quebec$total_jour)), xlim = c(0, nrow(deces_quebec)), ylab = "Décès", xlab = "Jours", main = "Quebec : Actuel - Décès")

previsions(deces_quebec$Date, deces_quebec$total_jour, "Québec - Prévision - Décès ", "Date", "Décès", 90)

plot(canada$total_deaths, ylab = "Nombre Morts", xlab = "Jour", xlim = c(0, nrow(canada)), ylim = c(0, max(canada$total_deaths, na.rm = T)), main = paste(country, " - Total Morts"))

plot(canada$total_cases, ylab = "Nombre Cas", xlab = "Jour", xlim = c(0, nrow(canada)), , ylim = c(0, max(canada$total_cases)), main = paste(country, " - Total Cas"))

plot(canada$new_deaths, ylab = "Nombre Morts", xlab = "Jour", xlim = c(0, nrow(canada)), ylim = c(0, 400), main = paste(country, " - Morts / Jour"))

plot(canada$new_cases, ylab = "Nombre Cas", xlab = "Jour", xlim = c(0, nrow(canada)), main = paste(country, " - Nouveaux Cas / Jour"))

plot(canada$total_cases_per_million, ylab = "Number Deaths", xlab = "Day", xlim = c(0, nrow(canada)), , main = "Total Cases /1 Million Population", col = "green", ylim = c(0, max(usa$total_cases_per_million, na.rm = T)), cex.axis = 0.75)
points(usa$total_cases_per_million, col = "red")
legend(1, 40000, legend = c("US", "Canada"), col = c("red", "green"), lty = 1:1, cex = 0.8)

plot(canada$total_deaths_per_million, ylab = "Number Deaths", xlab = "Day", xlim = c(0, nrow(canada)), main = "Total Deaths /1 Million Population", col = "green", ylim = c(0, max(usa$total_deaths_per_million, na.rm = T)), cex.axis = 0.75)
points(usa$total_deaths_per_million, col = "red")
legend(1, 800, legend = c("US", "Canada"), col = c("red", "green"), lty = 1:1, cex = 0.8)

plot(canada$new_cases_per_million, ylab = "Number Cases", xlab = "Day", xlim = c(0, nrow(canada)), main = "New Cases/1 Million Population per Day", col = "green", ylim = c(0, limNbCases), cex.axis = 0.75)
points(usa$new_cases_per_million, col = "red")
legend(1, 800, legend = c("US", "Canada"), col = c("red", "green"), lty = 1:1, cex = 0.8)

plot(canada$new_deaths_per_million, ylab = "Number Cases", xlab = "Day", xlim = c(0, nrow(canada)), main = "New Deaths/1 Million Population per Day", col = "green", ylim = c(0, 15), cex.axis = 0.75)
points(usa$new_deaths_per_million, col = "red")
legend(1, 800, legend = c("US", "Canada"), col = c("red", "green"), lty = 1:1, cex = 0.8)

tsCan <- ts(canada$new_deaths_per_million, start = 50, class = "ts")
plot(forecast(auto.arima(tsCan), h = 90), sub = "Forecast 90 jours")

tsCan2 <- ts(canada$new_deaths, start = 50, class = "ts")
plot(forecast(auto.arima(tsCan2), h = 90), sub = "Forecast 90 jours", ylab = "Nb de morts par jour", xlab = "Jours")

tsc_us <- tsc %>% filter(Country.Region == country)
tsc_us <- data.frame(t(tsc_us))
tsc_us <- cbind(rownames(tsc_us), data.frame(tsc_us, row.names = NULL))
colnames(tsc_us) <- c("Date", "Confirmed")
tsc_us <- tsc_us[-c(1:4), ]
tsc_us$Date <- ymd(tsc_us$Date)
tsc_us$Confirmed <- as.numeric(tsc_us$Confirmed)
str(tsc_us)
tsd_us <- tsd %>% filter(Country.Region == country)
tsd_us <- data.frame(t(tsd_us))
tsd_us <- cbind(rownames(tsd_us), data.frame(tsd_us, row.names = NULL))
colnames(tsd_us) <- c("Date", "Confirmed")
tsd_us <- tsd_us[-c(1:4), ]
tsd_us$Date <- ymd(tsd_us$Date)
tsd_us$Confirmed <- as.numeric(tsd_us$Confirmed)
str(tsd_us)
qplot(Date, Confirmed, data = tsc_us, main = paste("Covid19 - Number of confirmed Cases in ", country))

qplot(Date, Confirmed, data = tsd_us, main = paste("Covid19 - Number of confirmed deaths in ", country))

ds <- tsc_us$Date
y <- tsc_us$Confirmed
df <- data.frame(ds, y)
colnames(df) <- c("ds", "y")

ds2 <- tsd_us$Date
y2 <- tsd_us$Confirmed
df2 <- data.frame(ds2, y2)
colnames(df2) <- c("ds", "y")

tsd_us$delta <- c(NA, diff(tsd_us$Confirmed, lag = 1))
tsd_us$month <- format(as.Date(tsd_us$Date), "%m")
tsd_us$week <- week(as.Date(tsd_us$Date))
tsd_us$delta[is.na(tsd_us$delta)] <- 0
dfChunked <- df[-(1:100), , drop = FALSE]
m <- prophet(dfChunked, yearly.seasonality = F, daily.seasonality = F)
future <- make_future_dataframe(m, periods = predictionDays)
forcast <- predict(m, future)
plot(m, forcast, xlabel = "Date", ylabel = "Cases") + ggtitle(paste(country, " - Predicted Covid Caseses ", predictionDays, " days"))

dfChunked2 <- df2[-(1:100), , drop = FALSE]
m2 <- prophet(dfChunked2, yearly.seasonality = F, daily.seasonality = F)
future2 <- make_future_dataframe(m2, periods = predictionDays)
forcast2 <- predict(m2, future2)
plot(m2, forcast2, xlabel = "Date", ylabel = "Deaths") + ggtitle(paste(country, " - Predicted Covid Deaths ", predictionDays, " days"))

dyplot.prophet(m, forcast,
  main = paste(country, " - Predicted Covid Cases ", predictionDays, " days - FB Prophet"),
  xlab = "Date", ylab = "Cases"
) %>% dyOptions(maxNumberWidth = 20)
dyplot.prophet(m2, forcast2,
  main = paste(country, " - Predicted Covid Deaths ", predictionDays, " days - FB Prophet"),
  xlab = "Date", ylab = "Deaths"
) %>% dyOptions(maxNumberWidth = 20)

FB Prophet

prophet_plot_components(m, forcast)

US Deaths

prophet_plot_components(m2, forcast2)

l <- nrow(dfChunked)
pred <- forcast$yhat[1:l]
actual <- m$history$y
plot(actual, pred, main = paste(country, " Cases - Prediction vs Actual"))
abline(lm(pred ~ actual), col = "red")

pred2 <- forcast2$yhat[1:l]
actual2 <- m2$history$y
plot(actual2, pred2, main = paste(country, " Deaths - Prediction vs Actual"))
abline(lm(pred2 ~ actual2), col = "red")

summary(lm(pred ~ actual))
## 
## Call:
## lm(formula = pred ~ actual)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -53326  -6136  -1538   5669  51509 
## 
## Coefficients:
##                Estimate  Std. Error t value            Pr(>|t|)    
## (Intercept) 2152.557399  899.538898   2.393               0.017 *  
## actual         0.986869    0.004246 232.439 <0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 14470 on 646 degrees of freedom
## Multiple R-squared:  0.9882, Adjusted R-squared:  0.9882 
## F-statistic: 5.403e+04 on 1 and 646 DF,  p-value: < 0.00000000000000022
summary(lm(pred2 ~ actual2))
## 
## Call:
## lm(formula = pred2 ~ actual2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -101.397   -9.189   -1.579    9.021  128.026 
## 
## Coefficients:
##             Estimate Std. Error t value            Pr(>|t|)    
## (Intercept) 1.988466   2.329540   0.854               0.394    
## actual2     0.998785   0.001162 859.441 <0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 34.18 on 646 degrees of freedom
## Multiple R-squared:  0.9991, Adjusted R-squared:  0.9991 
## F-statistic: 7.386e+05 on 1 and 646 DF,  p-value: < 0.00000000000000022

rmarkdown::render(“COVID_CANADA.Rmd”)